names_df = pd.read_csv("https://raw.githubusercontent.com/byuidatascience/data4names/master/data-raw/names_year/names_year.csv")
peter_df = names_df[(names_df['name'] == 'Peter')]
long_df = peter_df.melt(
id_vars=['year', 'name'],
var_name='state',
value_name='Count'
)
long_df = long_df[long_df['state'].isin(['UT', 'OR'])]
vlines_df = pd.DataFrame({
'year': [1933, 1969, 2002],
'label': ['1936', '1972', '2005'],
'y': [50, 85, 75]
})
x_breaks = list(range(1910, 2021, 5))
x_labels = [str(x) if x % 10 == 0 else '' for x in x_breaks]
p = (ggplot(long_df, aes(x='year', y='Count', color=as_discrete('state')))
+ geom_line(size=1.2)
+ geom_vline(xintercept=1936, color='black')
+ geom_vline(xintercept=1972, color='black')
+ geom_vline(xintercept=2005, color='black')
+ geom_text(data=vlines_df, mapping=aes(x='year', y='y', label='label'), vjust=-1, color='black')
+ ggtitle("The history of Peter for Utah (red) and Oregon (orange)")
+ xlab("Year name given")
+ ylab("Count of Peter")
+ scale_x_continuous(breaks=x_breaks, labels=x_labels)
+ scale_color_manual(values={'UT': 'red', 'OR': 'orange'}, name=None)
+ theme(axis_text_x=element_text(angle=0, hjust=1))
+ ggsize(1200, 500)
)